Further fixes to the TLB-flush logic.
/* Switch page tables. */
write_ptbase(&next_p->mm);
- tlb_clocktick();
}
if ( unlikely(prev_p->io_bitmap != NULL) ||
#include <xen/softirq.h>
#include <asm/flushtlb.h>
-unsigned long tlbflush_epoch_changing;
u32 tlbflush_clock;
u32 tlbflush_time[NR_CPUS];
-void tlb_clocktick(void)
+void write_cr3(unsigned long cr3)
{
- u32 y, ny;
+ u32 t, t1, t2;
unsigned long flags;
local_irq_save(flags);
- /* Tick the clock. 'y' contains the current time after the tick. */
- ny = tlbflush_clock;
+ /*
+ * Tick the clock, which is incremented by two each time. The L.S.B. is
+ * used to decide who will control the epoch change, when one is required.
+ */
+ t = tlbflush_clock;
do {
-#ifdef CONFIG_SMP
- if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) )
+ t1 = t; /* t1: Time before this clock tick. */
+ t2 = t + 2; /* t2: Time after this clock tick. */
+ if ( unlikely(t2 & 1) )
{
- /* Epoch is changing: the first to detect this is the leader. */
- if ( unlikely(!test_and_set_bit(0, &tlbflush_epoch_changing)) )
- raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
- /* The clock doesn't tick again until end of the epoch change. */
- y--;
- break;
+ /* Epoch change: someone else is leader. */
+ t2 = t; /* no tick */
+ goto skip_clocktick;
+ }
+ else if ( unlikely((t2 & TLBCLOCK_EPOCH_MASK) == 0) )
+ {
+ /* Epoch change: we may become leader. */
+ t2--; /* half tick */
}
-#else
- y = ny+1;
-#endif
}
- while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) );
+ while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) );
+
+ /* Epoch change: we are the leader. */
+ if ( unlikely(t2 & 1) )
+ raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
+
+ skip_clocktick:
+ __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" );
/* Update this CPU's timestamp to new time. */
- tlbflush_time[smp_processor_id()] = y;
+ tlbflush_time[smp_processor_id()] = t2;
local_irq_restore(flags);
}
if ( unlikely((nx & PGT_count_mask) == 0) )
{
/* Record TLB information for flush later. Races are harmless. */
- page->tlbflush_timestamp = tlbflush_clock;
+ page->tlbflush_timestamp = tlbflush_current_time();
if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
likely(nx & PGT_validated) )
write_ptbase(&d->mm);
- put_page_and_type(&frame_table[old_base_pfn]);
-
- /*
- * Note that we tick the clock /after/ dropping the old base's
- * reference count. If the page tables got freed then this will
- * avoid unnecessary TLB flushes when the pages are reused. */
- tlb_clocktick();
+ put_page_and_type(&frame_table[old_base_pfn]);
}
else
{
int signal = 0;
struct pdb_breakpoint* bkpt;
int watchdog_save;
- unsigned long cr3;
-
- __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ unsigned long cr3 = read_cr3();
/* If the exception is an int3 from user space then pdb is only
interested if it re-wrote an instruction set the breakpoint.
/* No need for atomicity: we are the only possible updater. */
tlbflush_clock++;
-
- /* Finally, signal the end of the epoch-change protocol. */
- wmb();
- tlbflush_epoch_changing = 0;
-
- /* In case we got to the end of the next epoch already. */
- tlb_clocktick();
}
static void flush_tlb_all_pge_ipi(void* info)
#ifdef XEN_DEBUGGER
if ( pdb_initialized && (pdb_ctx.system_call != 0) )
{
- unsigned long cr3;
- __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
+ unsigned long cr3 = read_cr3();
if ( cr3 == pdb_ctx.ptbr )
pdb_linux_syscall_enter_bkpt(regs, error_code, ti);
}
for ( i = 0; i < (1 << order); i++ )
{
ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
- pg[i].tlbflush_timestamp = tlbflush_clock;
+ pg[i].tlbflush_timestamp = tlbflush_current_time();
pg[i].u.free.cpu_mask = 1 << d->processor;
list_del(&pg[i].list);
/*
* Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
- * Therefore, if the current TLB time and a previously-read timestamp differ
- * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
- * has wrapped at least once and every CPU's TLB is guaranteed to have been
- * flushed meanwhile.
* This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
*/
-#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
+#define TLBCLOCK_EPOCH_MASK ((1U<<20)-1)
/*
* 'cpu_stamp' is the current timestamp for the CPU we are testing.
{
/*
* Worst case in which a flush really is required:
- * CPU has not flushed since end of last epoch (cpu_stamp = 0x0000ffff).
- * Clock has run to end of current epoch (clock = 0x0001ffff).
- * Therefore maximum valid difference is 0x10000 (EPOCH_MASK + 1).
+ * 1. CPU has not flushed since end of last epoch.
+ * 2. Clock has run to end of current epoch.
+ * THEREFORE: Maximum valid difference is (EPOCH_MASK + 1).
* N.B. The clock cannot run further until the CPU has flushed once more
- * and updated its stamp to 0x1ffff, so this is as 'far out' as it can get.
+ * and updated to current time, so this is as 'far out' as it can get.
*/
return ((lastuse_stamp - cpu_stamp) <= (TLBCLOCK_EPOCH_MASK + 1));
}
-extern unsigned long tlbflush_epoch_changing;
+/*
+ * The least significant bit of the clock indicates whether an epoch-change
+ * is in progress. All other bits form the counter that is incremented on
+ * each clock tick.
+ */
extern u32 tlbflush_clock;
extern u32 tlbflush_time[NR_CPUS];
-extern void tlb_clocktick(void);
+#define tlbflush_current_time() tlbflush_clock
+
extern void new_tlbflush_clock_period(void);
+/* Read pagetable base. */
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+ __asm__ __volatile__ (
+ "mov"__OS" %%cr3, %0" : "=r" (cr3) : );
+ return cr3;
+}
+
+/* Write pagetable base and implicitly tick the tlbflush clock. */
+extern void write_cr3(unsigned long cr3);
+
/*
* TLB flushing:
*
* and page-granular flushes are available only on i486 and up.
*/
+#define __flush_tlb() \
+ do { \
+ unsigned long cr3 = read_cr3(); \
+ write_cr3(cr3); \
+ } while ( 0 )
+
#ifndef CONFIG_SMP
#define flush_tlb() __flush_tlb()
extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
extern void paging_init(void);
-#define __flush_tlb() \
- do { \
- __asm__ __volatile__ ( \
- "mov %%cr3, %%"__OP"ax; mov %%"__OP"ax, %%cr3" \
- : : : "memory", __OP"ax" ); \
- tlb_clocktick(); \
- } while ( 0 )
-
/* Flush global pages as well. */
#define __pge_off() \
else
pa = pagetable_val(mm->pagetable);
- __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (pa) : "memory" );
+ write_cr3(pa);
}
#define IDLE0_MM \